import os
import pandas as pd
import numpy as np
import torch
import dill as pickle
import statistics
import math
import plotly
import plotly.graph_objs as go
import plotly.express as px
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import bamt.Networks as Nets
import bamt.Nodes as Nodes
import bamt.Preprocessors as pp
from pgmpy.estimators import K2Score
from tedeous.solver import grid_format_prepare
from IPython.core.display import SVG
from IPython import display
import warnings
warnings.filterwarnings('ignore')
C:\Users\user\anaconda3\envs\article\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
$u(t)$ - a function representing the population of prey;
$v(t)$ - a function representing the population of predators;
$\alpha$ - the birth rate coefficient of prey;
$\beta$ - the predation rate coefficient of prey;
$\gamma $ - the mortality rate coefficient of predators;
$\delta$ - the reproductive capacity coefficient.
Source: https://github.com/stan-dev/example-models/blob/master/knitr/lotka-volterra/hudson-bay-lynx-hare.csv
Data: Annual information on the populations of lynxes and hares from 1900 to 1920 (21 rows)
source = pd.read_csv('hudson-bay-lynx-hare.csv', sep = ', ', engine = 'python').values
t = source[:, 0]
x = source[:, 1] # Lynx/Hunters
y = source[:, 2] # Hare/Prey
source_data = [y, x]
fig = go.Figure()
fig.add_trace(go.Scatter(x=t, y=y, name=f'Source data - u (Hare)', line=dict(color='firebrick', width=2)))
fig.add_trace(go.Scatter(x=t, y=x, name=f'Source data - v (Lynx)', line=dict(color='black', width=2)))
fig.update_layout(xaxis_title="Time, t [Years]", yaxis_title="Population")
fig.show()
The application of the multi-objective evolutionary optimization algorithm enables the discovery of a Pareto-optimal set of equations that do not dominate each other. Upon multiple runs of the algorithm on the same data, the obtained equations and their coefficients are collected. The structure of the table is presented in the figure below.
SVG(filename='info.svg')
The columns in the table represent the terms obtained during the discovery of partial differential equations. Each entry/row contains the coefficients for each term.
This structure is necessary for constructing a joint distribution to determine which terms occur together most frequently. Based on this data, a Bayesian network will be constructed, allowing for the analysis of interdependencies and relationships among the terms in the system.
Preprocessing includes the removal of zero columns/rows, analysis of the frequency of occurrence of specific terms, and assessment of the diversity among the obtained equations.
path = 'numerical_results/'
df = pd.read_csv(f'{path}output_main_hunter_prey_lynx_hare.csv', index_col = 'Unnamed: 0', sep='\t', encoding='utf-8')
df.shape
(211, 21)
df.head()
| C_u | v{power: 1.0} * u{power: 1.0}_u | u{power: 1.0}_u | v{power: 1.0}_u | v{power: 1.0} * dv/dx1{power: 1.0}_u | u{power: 1.0} * du/dx1{power: 1.0}_u | dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * v{power: 1.0}_u | du/dx1{power: 1.0}_r_u | ... | v{power: 1.0}_v | v{power: 1.0} * u{power: 1.0}_v | u{power: 1.0}_v | dv/dx1{power: 1.0} * v{power: 1.0}_v | dv/dx1{power: 1.0} * u{power: 1.0}_v | du/dx1{power: 1.0}_v | dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v | du/dx1{power: 1.0} * u{power: 1.0}_v | dv/dx1{power: 1.0}_r_v | dv/dx1{power: 1.0} * u{power: 1.0}_r_v | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 |
| 1 | 17.890508 | 0.000000 | 0.000000 | -0.906833 | -0.011958 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 |
| 2 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 |
| 3 | 17.890508 | 0.000000 | 0.000000 | -0.906833 | -0.011958 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 |
| 4 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 |
5 rows × 21 columns
for col in df.columns:
df[col] = df[col].round(decimals = 10)
# Deleting rows if all coefficients on the left side are zero and the coefficient on the right side is -1 (based on the sum of row elements).
df = df.loc[(df.sum(axis=1) != -2), (df.sum(axis=0) != 0)] # for system
# Removing zero columns
df = df.loc[:, (df != 0).any(axis=0)]
# Count of nonzero values in columns
(df != 0).sum(axis = 0)
C_u 211
v{power: 1.0} * u{power: 1.0}_u 164
u{power: 1.0}_u 162
v{power: 1.0}_u 40
v{power: 1.0} * dv/dx1{power: 1.0}_u 7
u{power: 1.0} * du/dx1{power: 1.0}_u 29
dv/dx1{power: 1.0}_u 9
du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u 6
du/dx1{power: 1.0} * v{power: 1.0}_u 5
du/dx1{power: 1.0}_r_u 211
C_v 211
v{power: 1.0}_v 159
v{power: 1.0} * u{power: 1.0}_v 155
u{power: 1.0}_v 46
dv/dx1{power: 1.0} * v{power: 1.0}_v 37
dv/dx1{power: 1.0} * u{power: 1.0}_v 8
du/dx1{power: 1.0}_v 7
dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v 8
du/dx1{power: 1.0} * u{power: 1.0}_v 2
dv/dx1{power: 1.0}_r_v 210
dv/dx1{power: 1.0} * u{power: 1.0}_r_v 1
dtype: int64
df_initial = df.copy() # The initial data is used for learning the Bayesian network
df.shape
(211, 21)
for col in df.columns:
if '_r' in col:
df = df.astype({col: "int64"})
df = df.astype({col: "str"}) # Discrete values are wrapped as strings to ensure proper functioning during learning
df.head()
| C_u | v{power: 1.0} * u{power: 1.0}_u | u{power: 1.0}_u | v{power: 1.0}_u | v{power: 1.0} * dv/dx1{power: 1.0}_u | u{power: 1.0} * du/dx1{power: 1.0}_u | dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * v{power: 1.0}_u | du/dx1{power: 1.0}_r_u | ... | v{power: 1.0}_v | v{power: 1.0} * u{power: 1.0}_v | u{power: 1.0}_v | dv/dx1{power: 1.0} * v{power: 1.0}_v | dv/dx1{power: 1.0} * u{power: 1.0}_v | du/dx1{power: 1.0}_v | dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v | du/dx1{power: 1.0} * u{power: 1.0}_v | dv/dx1{power: 1.0}_r_v | dv/dx1{power: 1.0} * u{power: 1.0}_r_v | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | 0 |
| 1 | 17.890508 | 0.000000 | 0.000000 | -0.906833 | -0.011958 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | 0 |
| 2 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | 0 |
| 3 | 17.890508 | 0.000000 | 0.000000 | -0.906833 | -0.011958 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | 0 |
| 4 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | ... | -0.837908 | 0.026033 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1 | 0 |
5 rows × 21 columns
df.groupby(df.columns.tolist(),as_index=False).size() # The number of diverse obtained systems
| C_u | v{power: 1.0} * u{power: 1.0}_u | u{power: 1.0}_u | v{power: 1.0}_u | v{power: 1.0} * dv/dx1{power: 1.0}_u | u{power: 1.0} * du/dx1{power: 1.0}_u | dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * v{power: 1.0}_u | du/dx1{power: 1.0}_r_u | ... | v{power: 1.0} * u{power: 1.0}_v | u{power: 1.0}_v | dv/dx1{power: 1.0} * v{power: 1.0}_v | dv/dx1{power: 1.0} * u{power: 1.0}_v | du/dx1{power: 1.0}_v | dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v | du/dx1{power: 1.0} * u{power: 1.0}_v | dv/dx1{power: 1.0}_r_v | dv/dx1{power: 1.0} * u{power: 1.0}_r_v | size | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.719965 | 0.000000 | 0.008748 | 0.000000 | 0.000000 | 0.018813 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.253330 | 0.018645 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 1 | -0.388341 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.019710 | 0.154030 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.430548 | 0.000000 | 0.000000 | 0.000000 | -0.019533 | 0.000000 | -1 | 0 | 1 |
| 2 | -0.388341 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.019710 | 0.154030 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.253330 | 0.018645 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 3 | -0.388341 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.019710 | 0.154030 | 0.00000 | 0.000000 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 2 |
| 4 | -0.379967 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.019675 | 0.154793 | 0.00000 | 0.000000 | -1 | ... | 0.026033 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 5 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 25.751746 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0 | -1 | 1 |
| 6 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.523036 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -0.005488 | -1 | 0 | 2 |
| 7 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.430548 | 0.000000 | 0.000000 | 0.000000 | -0.019533 | 0.000000 | -1 | 0 | 7 |
| 8 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.183901 | 0.000000 | 0.013403 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 6 |
| 9 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.253330 | 0.018645 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 18 |
| 10 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.000000 | 0.000000 | 0.018619 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 11 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.000000 | 0.026424 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 2 |
| 12 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 64 |
| 13 | -0.142318 | -0.028009 | 0.562570 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.000000 | 0.027226 | 0.000000 | 0.069275 | 0.000000 | 0.000000 | -1 | 0 | 4 |
| 14 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.253993 | 0.018674 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 5 |
| 15 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.000000 | 0.000000 | 0.018975 | 0.067130 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 16 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.026033 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 47 |
| 17 | 0.065667 | -0.027988 | 0.546189 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.000000 | 0.027006 | 0.000000 | 0.064849 | 0.000000 | 0.000000 | -1 | 0 | 2 |
| 18 | 2.928678 | 0.000000 | 0.103895 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.025214 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 19 | 6.248662 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.076275 | 0.00000 | 0.025155 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 20 | 8.618788 | 0.000000 | 0.000000 | -0.439853 | 0.000000 | 0.012617 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.253993 | 0.018674 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 21 | 8.618788 | 0.000000 | 0.000000 | -0.439853 | 0.000000 | 0.012617 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.026033 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 4 |
| 22 | 8.768071 | 0.000000 | 0.000000 | -0.442624 | 0.000000 | 0.012593 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.000000 | 0.253330 | 0.018645 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 3 |
| 23 | 8.768071 | 0.000000 | 0.000000 | -0.442624 | 0.000000 | 0.012593 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 15 |
| 24 | 11.653214 | 0.000000 | 0.000000 | -0.376289 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.016304 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 3 |
| 25 | 16.786657 | 0.000000 | 0.000000 | -0.785150 | 0.000000 | 0.000000 | 0.000000 | 0.02145 | 0.000000 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 6 |
| 26 | 17.890508 | 0.000000 | 0.000000 | -0.906833 | -0.011958 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.026033 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 4 |
| 27 | 18.056171 | -0.028170 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.797993 | 0.00000 | 0.000000 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 3 |
| 28 | 18.094896 | -0.007885 | 0.000000 | -0.653176 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 1 |
| 29 | 18.113642 | 0.000000 | 0.000000 | -0.908095 | -0.011720 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | -1 | ... | 0.025532 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1 | 0 | 3 |
30 rows × 22 columns
all_r = df.shape[0]
unique_r = df.groupby(df.columns.tolist(),as_index=False).size().shape[0]
print(f'Out of {all_r} obtained systems, \033[1m{unique_r} are unique\033[0m ({int(unique_r / all_r * 100)} %)')
Out of 211 obtained systems, 30 are unique (14 %)
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 211 entries, 0 to 210
Data columns (total 21 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 C_u 211 non-null float64
1 v{power: 1.0} * u{power: 1.0}_u 211 non-null float64
2 u{power: 1.0}_u 211 non-null float64
3 v{power: 1.0}_u 211 non-null float64
4 v{power: 1.0} * dv/dx1{power: 1.0}_u 211 non-null float64
5 u{power: 1.0} * du/dx1{power: 1.0}_u 211 non-null float64
6 dv/dx1{power: 1.0}_u 211 non-null float64
7 du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u 211 non-null float64
8 du/dx1{power: 1.0} * v{power: 1.0}_u 211 non-null float64
9 du/dx1{power: 1.0}_r_u 211 non-null object
10 C_v 211 non-null float64
11 v{power: 1.0}_v 211 non-null float64
12 v{power: 1.0} * u{power: 1.0}_v 211 non-null float64
13 u{power: 1.0}_v 211 non-null float64
14 dv/dx1{power: 1.0} * v{power: 1.0}_v 211 non-null float64
15 dv/dx1{power: 1.0} * u{power: 1.0}_v 211 non-null float64
16 du/dx1{power: 1.0}_v 211 non-null float64
17 dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v 211 non-null float64
18 du/dx1{power: 1.0} * u{power: 1.0}_v 211 non-null float64
19 dv/dx1{power: 1.0}_r_v 211 non-null object
20 dv/dx1{power: 1.0} * u{power: 1.0}_r_v 211 non-null object
dtypes: float64(18), object(3)
memory usage: 36.3+ KB
discretizer = preprocessing.KBinsDiscretizer(n_bins=9, encode='ordinal', strategy='quantile') # Empirical parameter tuning
encoder = preprocessing.LabelEncoder()
p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
data, est = p.apply(df)
info_r = p.info
info_r
{'types': {'C_u': 'cont',
'v{power: 1.0} * u{power: 1.0}_u': 'cont',
'u{power: 1.0}_u': 'cont',
'v{power: 1.0}_u': 'cont',
'v{power: 1.0} * dv/dx1{power: 1.0}_u': 'cont',
'u{power: 1.0} * du/dx1{power: 1.0}_u': 'cont',
'dv/dx1{power: 1.0}_u': 'cont',
'du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u': 'cont',
'du/dx1{power: 1.0} * v{power: 1.0}_u': 'cont',
'du/dx1{power: 1.0}_r_u': 'disc',
'C_v': 'cont',
'v{power: 1.0}_v': 'cont',
'v{power: 1.0} * u{power: 1.0}_v': 'cont',
'u{power: 1.0}_v': 'cont',
'dv/dx1{power: 1.0} * v{power: 1.0}_v': 'cont',
'dv/dx1{power: 1.0} * u{power: 1.0}_v': 'cont',
'du/dx1{power: 1.0}_v': 'cont',
'dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v': 'cont',
'du/dx1{power: 1.0} * u{power: 1.0}_v': 'cont',
'dv/dx1{power: 1.0}_r_v': 'disc',
'dv/dx1{power: 1.0} * u{power: 1.0}_r_v': 'disc'},
'signs': {'C_u': 'neg',
'v{power: 1.0} * u{power: 1.0}_u': 'neg',
'u{power: 1.0}_u': 'pos',
'v{power: 1.0}_u': 'neg',
'v{power: 1.0} * dv/dx1{power: 1.0}_u': 'neg',
'u{power: 1.0} * du/dx1{power: 1.0}_u': 'pos',
'dv/dx1{power: 1.0}_u': 'pos',
'du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u': 'pos',
'du/dx1{power: 1.0} * v{power: 1.0}_u': 'pos',
'C_v': 'neg',
'v{power: 1.0}_v': 'neg',
'v{power: 1.0} * u{power: 1.0}_v': 'pos',
'u{power: 1.0}_v': 'pos',
'dv/dx1{power: 1.0} * v{power: 1.0}_v': 'pos',
'dv/dx1{power: 1.0} * u{power: 1.0}_v': 'pos',
'du/dx1{power: 1.0}_v': 'pos',
'dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v': 'neg',
'du/dx1{power: 1.0} * u{power: 1.0}_v': 'neg'}}
data.head()
| C_u | v{power: 1.0} * u{power: 1.0}_u | u{power: 1.0}_u | v{power: 1.0}_u | v{power: 1.0} * dv/dx1{power: 1.0}_u | u{power: 1.0} * du/dx1{power: 1.0}_u | dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u | du/dx1{power: 1.0} * v{power: 1.0}_u | du/dx1{power: 1.0}_r_u | ... | v{power: 1.0}_v | v{power: 1.0} * u{power: 1.0}_v | u{power: 1.0}_v | dv/dx1{power: 1.0} * v{power: 1.0}_v | dv/dx1{power: 1.0} * u{power: 1.0}_v | du/dx1{power: 1.0}_v | dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v | du/dx1{power: 1.0} * u{power: 1.0}_v | dv/dx1{power: 1.0}_r_v | dv/dx1{power: 1.0} * u{power: 1.0}_r_v | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 1 | 3 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 2 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 3 | 3 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| 4 | 2 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
5 rows × 21 columns
There are 3 type of Bayessian Networks - DiscreteBN, ContinuousBN, HybridBN.
bn = Nets.HybridBN(has_logit=True, use_mixture=True)
bn.add_nodes(info_r)
bn.get_info()
| name | node_type | data_type | parents | parents_types | |
|---|---|---|---|---|---|
| 0 | C_u | Gaussian | cont | [] | [] |
| 1 | v{power: 1.0} * u{power: 1.0}_u | Gaussian | cont | [] | [] |
| 2 | u{power: 1.0}_u | Gaussian | cont | [] | [] |
| 3 | v{power: 1.0}_u | Gaussian | cont | [] | [] |
| 4 | v{power: 1.0} * dv/dx1{power: 1.0}_u | Gaussian | cont | [] | [] |
| 5 | u{power: 1.0} * du/dx1{power: 1.0}_u | Gaussian | cont | [] | [] |
| 6 | dv/dx1{power: 1.0}_u | Gaussian | cont | [] | [] |
| 7 | du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u | Gaussian | cont | [] | [] |
| 8 | du/dx1{power: 1.0} * v{power: 1.0}_u | Gaussian | cont | [] | [] |
| 9 | du/dx1{power: 1.0}_r_u | Discrete | disc | [] | [] |
| 10 | C_v | Gaussian | cont | [] | [] |
| 11 | v{power: 1.0}_v | Gaussian | cont | [] | [] |
| 12 | v{power: 1.0} * u{power: 1.0}_v | Gaussian | cont | [] | [] |
| 13 | u{power: 1.0}_v | Gaussian | cont | [] | [] |
| 14 | dv/dx1{power: 1.0} * v{power: 1.0}_v | Gaussian | cont | [] | [] |
| 15 | dv/dx1{power: 1.0} * u{power: 1.0}_v | Gaussian | cont | [] | [] |
| 16 | du/dx1{power: 1.0}_v | Gaussian | cont | [] | [] |
| 17 | dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v | Gaussian | cont | [] | [] |
| 18 | du/dx1{power: 1.0} * u{power: 1.0}_v | Gaussian | cont | [] | [] |
| 19 | dv/dx1{power: 1.0}_r_v | Discrete | disc | [] | [] |
| 20 | dv/dx1{power: 1.0} * u{power: 1.0}_r_v | Discrete | disc | [] | [] |
variable_names = ['u', 'v']
df_temp = (df_initial[[col for col in df_initial.columns if '_r' in col]] != 0).copy()
print(df_temp.sum(axis=0).sort_values(ascending=False)[:len(variable_names)])
init_nodes_list = []
for i in range(len(variable_names)):
init_nodes = df_temp.sum(axis=0).idxmax()
init_nodes_list.append(init_nodes)
df_temp = df_temp.drop(init_nodes, axis=1)
print(init_nodes_list)
params = {"init_nodes": init_nodes_list, 'init_edges':[('du/dx1{power: 1.0}_r_u', 'v{power: 1.0} * u{power: 1.0}_u'), ('dv/dx1{power: 1.0}_r_v', 'v{power: 1.0} * u{power: 1.0}_v')],
'remove_init_edges':True}
du/dx1{power: 1.0}_r_u 211
dv/dx1{power: 1.0}_r_v 210
dtype: int64
['du/dx1{power: 1.0}_r_u', 'dv/dx1{power: 1.0}_r_v']
bn.add_edges(data, scoring_function=('K2', K2Score), params = params) # Structure Learning
bn.get_info()
| name | node_type | data_type | parents | parents_types | |
|---|---|---|---|---|---|
| 0 | v{power: 1.0} * dv/dx1{power: 1.0}_u | MixtureGaussian | cont | [] | [] |
| 1 | dv/dx1{power: 1.0}_u | MixtureGaussian | cont | [] | [] |
| 2 | du/dx1{power: 1.0} * dv/dx1{power: 1.0}_u | MixtureGaussian | cont | [] | [] |
| 3 | du/dx1{power: 1.0} * v{power: 1.0}_u | MixtureGaussian | cont | [] | [] |
| 4 | du/dx1{power: 1.0}_r_u | Discrete | disc | [] | [] |
| 5 | dv/dx1{power: 1.0} * u{power: 1.0}_v | MixtureGaussian | cont | [] | [] |
| 6 | du/dx1{power: 1.0}_v | MixtureGaussian | cont | [] | [] |
| 7 | dv/dx1{power: 1.0} * du/dx1{power: 1.0}_v | MixtureGaussian | cont | [] | [] |
| 8 | du/dx1{power: 1.0} * u{power: 1.0}_v | MixtureGaussian | cont | [] | [] |
| 9 | dv/dx1{power: 1.0}_r_v | Discrete | disc | [] | [] |
| 10 | v{power: 1.0} * u{power: 1.0}_u | ConditionalMixtureGaussian | cont | [du/dx1{power: 1.0}_r_u] | [disc] |
| 11 | dv/dx1{power: 1.0} * u{power: 1.0}_r_v | Discrete | disc | [dv/dx1{power: 1.0}_r_v] | [disc] |
| 12 | u{power: 1.0}_u | MixtureGaussian | cont | [v{power: 1.0} * u{power: 1.0}_u] | [cont] |
| 13 | v{power: 1.0}_u | MixtureGaussian | cont | [u{power: 1.0}_u] | [cont] |
| 14 | C_u | MixtureGaussian | cont | [v{power: 1.0} * u{power: 1.0}_u, u{power: 1.0... | [cont, cont] |
| 15 | u{power: 1.0} * du/dx1{power: 1.0}_u | MixtureGaussian | cont | [u{power: 1.0}_u, v{power: 1.0}_u] | [cont, cont] |
| 16 | C_v | ConditionalMixtureGaussian | cont | [C_u, u{power: 1.0}_u, dv/dx1{power: 1.0} * u{... | [cont, cont, disc] |
| 17 | v{power: 1.0}_v | MixtureGaussian | cont | [C_v] | [cont] |
| 18 | u{power: 1.0}_v | MixtureGaussian | cont | [C_v] | [cont] |
| 19 | v{power: 1.0} * u{power: 1.0}_v | MixtureGaussian | cont | [C_v, v{power: 1.0}_v] | [cont, cont] |
| 20 | dv/dx1{power: 1.0} * v{power: 1.0}_v | MixtureGaussian | cont | [C_v, v{power: 1.0} * u{power: 1.0}_v, u{power... | [cont, cont, cont] |
bn.plot(f'output_main_hunter_prey.html')
%%time
bn.fit_parameters(df_initial)
CPU times: total: 18.6 s Wall time: 5.98 s
def get_objects(synth_data):
"""
Parameters
----------
synth_data : pd.dataframe
The fields in the table are structures of received systems/equations,
where each record/row contains coefficients at each structure
config_bamt: class Config from TEDEouS/config.py contains the initial configuration of the task
Returns
-------
objects_result - list objects (combination of equations or systems)
"""
objects = [] # equations or systems
for i in range(len(synth_data)):
object_row = {}
for col in synth_data.columns:
object_row[synth_data[col].name] = synth_data[col].values[i]
objects.append(object_row)
objects_result = []
for i in range(len(synth_data)):
object_res = {}
for key, value in objects[i].items():
if abs(float(value)) > 0.01:
object_res[key] = value
objects_result.append(object_res)
return objects_result
objects_res = []
sample_k = 30
while len(objects_res) < sample_k:
synth_data = bn.sample(200, as_df=True)
temp_res = get_objects(synth_data)
if len(temp_res) + len(objects_res) > sample_k:
objects_res += temp_res[:sample_k - len(objects_res)]
else:
objects_res += temp_res
100%|██████████| 200/200 [00:04<00:00, 45.80it/s] 100%|██████████| 200/200 [00:04<00:00, 47.49it/s] 100%|██████████| 200/200 [00:04<00:00, 47.66it/s] 100%|██████████| 200/200 [00:04<00:00, 47.57it/s]
list_correct_structures_unique = ['v{power: 1.0} * u{power: 1.0}_v', 'v{power: 1.0}_v', 'dv/dx1{power: 1.0}_r_v',
'v{power: 1.0} * u{power: 1.0}_u', 'u{power: 1.0}_u', 'du/dx1{power: 1.0}_r_u']
import re
import itertools
list_correct_structures = set()
for term in list_correct_structures_unique:
str_r = '_r' if '_r' in term else ''
str_elem = ''
if any(f'_{elem}' in term for elem in variable_names):
for elem in variable_names:
if f'_{elem}' in term:
term = term.replace(f'_{elem}', "")
str_elem = f'_{elem}'
# for case if several terms exist
arr_term = re.sub('_r', '', term).split(' * ')
perm_set = list(itertools.permutations([i for i in range(len(arr_term))]))
for p_i in perm_set:
temp = " * ".join([arr_term[i] for i in p_i]) + str_r + str_elem
list_correct_structures.add(temp)
def out_red(text):
print("\033[31m {}".format(text), end='')
def out_green(text):
print("\033[32m {}".format(text), end='')
met, k_sys = 0, len(objects_res)
k_min = k_sys if k_sys < 5 else 5
for object_row in objects_res[:k_min]:
k_c, k_l = 0, 0
for col in df_initial.columns:
if col in object_row:
if col in list_correct_structures:
k_c += 1
out_green(f'{col}')
print(f'\033[0m:{object_row[col]}')
else:
k_l += 1
out_red(f'{col}')
print(f'\033[0m:{object_row[col]}')
print(f'correct structures = {k_c}/{len(list_correct_structures_unique)}')
print(f'incorrect structures = {k_l}')
print('--------------------------')
for object_row in objects_res:
for temp in object_row.keys():
if temp in list_correct_structures:
met += 1
print(f'average value (equation - {k_sys}) = {met / k_sys}')
C_u:-0.13706544031186782 v{power: 1.0} * u{power: 1.0}_u:-0.02796083044228613 u{power: 1.0}_u:0.5257918697164456 dv/dx1{power: 1.0}_u:0.016114912486107302 du/dx1{power: 1.0}_r_u:-1.0 v{power: 1.0}_v:-0.8356804923371564 v{power: 1.0} * u{power: 1.0}_v:0.025941183828859165 dv/dx1{power: 1.0}_r_v:-1.0 correct structures = 6/6 incorrect structures = 2 -------------------------- C_u:-0.1390545344562649 v{power: 1.0} * u{power: 1.0}_u:-0.027979225523030306 u{power: 1.0}_u:0.5397202971530966 dv/dx1{power: 1.0}_u:0.029346320517112234 du/dx1{power: 1.0}_r_u:-1.0 v{power: 1.0}_v:-0.8356853851794231 v{power: 1.0} * u{power: 1.0}_v:0.025941384620031628 dv/dx1{power: 1.0}_r_v:-1.0 correct structures = 6/6 incorrect structures = 2 -------------------------- C_u:-0.1447921439943488 v{power: 1.0} * u{power: 1.0}_u:-0.02803228675995169 u{power: 1.0}_u:0.5798973186059193 du/dx1{power: 1.0}_r_u:-1.0 v{power: 1.0}_v:-0.8356994984859185 v{power: 1.0} * u{power: 1.0}_v:0.025941963791211144 dv/dx1{power: 1.0}_r_v:-1.0 correct structures = 6/6 incorrect structures = 1 -------------------------- C_u:-0.14536311767132443 v{power: 1.0} * u{power: 1.0}_u:-0.02803756710559053 u{power: 1.0}_u:0.5838955031314497 dv/dx1{power: 1.0}_u:0.052687451197191856 du/dx1{power: 1.0}_r_u:-1.0 v{power: 1.0}_v:-0.8357009031768629 v{power: 1.0} * u{power: 1.0}_v:0.025942021449693667 dv/dx1{power: 1.0}_r_v:-1.0 correct structures = 6/6 incorrect structures = 2 -------------------------- C_u:-0.1400638508397519 v{power: 1.0} * u{power: 1.0}_u:-0.02798855965026343 u{power: 1.0}_u:0.546787931456705 du/dx1{power: 1.0}_r_u:-1.0 v{power: 1.0}_v:-0.8356878678029228 v{power: 1.0} * u{power: 1.0}_v:0.025941486498431917 dv/dx1{power: 1.0}_r_v:-1.0 correct structures = 6/6 incorrect structures = 1 -------------------------- average value (equation - 30) = 5.333333333333333
==================================
Source: https://github.com/stan-dev/example-models/blob/master/knitr/lotka-volterra/lotka-volterra-predator-prey.Rmd (line 558)
"Using a non-statistically motivated error term and optimization, Howard (2009) provides the following point estimates for the model parameters based on the data."
$\alpha$ = 0.55; $\beta$ = 0.028; $\gamma $ = 0.84; $\delta$ = 0.026
\begin{equation*} \begin{cases} \Large\frac{\partial u}{\partial t} = \normalsize 0.55 \cdot u - 0.028 \cdot u \cdot v, \\ \Large\frac{\partial v}{\partial t} = \normalsize - 0.84 \cdot v + 0.026 \cdot u \cdot v. \end{cases} \end{equation*}$t \in$ [0, 20], $u_0, \ v_0$ = 30, 4
The equations have periodic solutions. The solution was obtained using scipy.integrate.odeint
t_numerical = np.load(f'{path}t_numerical.npy')
numerical_solution = np.load(f'{path}numerical_solution.npy')
x_numerical = numerical_solution.T[:, 0] # Hare/Prey
y_numerical = numerical_solution.T[:, 1] # Lynx/Hunters
numerical_data = [x_numerical, y_numerical]
t_new = np.linspace(0, int(t_numerical[-1]), len(t)) # Shifting from the interval 1900-1920 to 0-20 for t with the same step size
fig = go.Figure()
fig.add_trace(go.Scatter(x=t_numerical, y=x_numerical, name=f'Numerical solution - u (Hare)', line=dict(color='firebrick', width=2)))
fig.add_trace(go.Scatter(x=t_numerical, y=y_numerical, name=f'Numerical solution - v (Lynx)', line=dict(color='black', width=2)))
fig.add_trace(go.Scatter(x=t_new, y=y,
name=f'Source data - u (Hare)',
line=dict(color='firebrick', dash='dash', width=2)))
fig.add_trace(go.Scatter(x=t_new, y=x,
name=f'Source data - v (Lynx)',
line=dict(color='black', dash='dash', width=2)))
fig.update_layout(title="Hunter-prey",
xaxis_title="Time",
yaxis_title="Population")
fig.show()
The generated/sampled 30 systems based on the Bayesian network are solved to determine the uncertainty in the data domain.
max_deriv_order = 1# 1 or 2
if max_deriv_order == 1:
set_solutions = torch.load(f'{path}file_u_main_first_derivative.pt')
equations = []
with open(f'{path}data_equations_30_first.pickle', 'rb') as f:
equations = pickle.load(f)
param = [np.linspace(0, 11, 201), ]
list_incor_sol = [5, 6, 13, 20, 26, 29]
else:
set_solutions = torch.load(f'{path}file_u_main_second_derivative.pt')
equations = []
with open(f'{path}data_equations_30_second.pickle', 'rb') as f:
equations = pickle.load(f)
param = [np.linspace(0, 11, 1001),]
list_incor_sol = [3, 4, 8, 14]
variable_names = ['u', 'v'] # list of objective function names
grid = grid_format_prepare(param, "mat")
num_steps = len(set_solutions)
fig = go.Figure(data=[go.Scatter(x = grid.reshape(-1), y=set_solutions[0][:, 0], name='u_autograd'),
go.Scatter(x = grid.reshape(-1), y=set_solutions[0][:, 1], name='v_autograd')])
frames=[]
for i in range(0, len(set_solutions)):
frames.append(go.Frame(name=str(i),
data=[go.Scatter(x=grid.reshape(-1), y=set_solutions[i, :, 0], name='u_autograd'),
go.Scatter(x=grid.reshape(-1), y=set_solutions[i, :, 1], name='v_autograd')]))
steps = []
for i in range(num_steps):
step = dict(
label = f'{i}',
method = "animate",
args = [[str(i)]]
)
steps.append(step)
sliders = [dict(
currentvalue = {"prefix": "Уравнение №", "font": {"size": 14}},
len = 1,
x = 0,
pad = {"b": 10, "t": 50},
steps = steps,
)]
# title нужно задавать в общем случае
fig.update_layout(title="Hunters - Prey",
xaxis_title="Time t",
yaxis_title="Population",
legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99
))
fig.layout.sliders = sliders
fig.frames = frames
fig.show()
def token_check_obj(columns, object_row):
list_correct_structures = list_correct_structures_unique
k_c, k_l = 0, 0
for col in columns:
if col in object_row:
if col in list_correct_structures:
k_c += 1
else:
k_l += 1
return k_c, k_l
def plot_token_check_obj(columns, object_row):
list_correct_structures = list_correct_structures_unique
k_c, k_l = 0, 0
for col in columns:
if col in object_row:
if col in list_correct_structures:
k_c += 1
out_green(f'{col}')
print(f'\033[0m:{object_row[col]}')
else:
k_l += 1
out_red(f'{col}')
print(f'\033[0m:{object_row[col]}')
print(f'correct structures = {k_c}/{6}')
print(f'incorrect structures = {k_l}')
After obtaining the solutions, it is possible to visually examine them and exclude those that clearly do not fit (expert knowledge). Considering the nature of the studied process, which is the interaction between predator and prey, in our case, solutions cannot enter the negative region or remain constant.
The confidence interval for the mean with unknown variance is calculated using the formula:
$$(\overline{x} - \frac{S}{\sqrt{n}} C_{\frac{\alpha}{2}}; \overline{x} + \frac{S}{\sqrt{n}}C_{\frac{\alpha}{2}})$$, where: $\overline{x}$ - sample mean $S$ - sample standard deviation $n$ - sample size $C_{\frac{\alpha}{2}}$ - quantile $\frac{S}{\sqrt{n}} C_{\frac{\alpha}{2}}$ - maximum estimation error
set_solutions_new = []
for i in range(len(set_solutions)):
if i not in list_incor_sol:
if not len(set_solutions_new):
set_solutions_new = [set_solutions[i]]
else:
set_solutions_new.append(set_solutions[i])
set_solutions_new = np.array(set_solutions_new)
temp = set_solutions.copy()
set_solutions = set_solutions_new.copy()
mean_arr = np.zeros((set_solutions.shape[1], set_solutions.shape[2]))
var_arr = np.zeros((set_solutions.shape[1], set_solutions.shape[2]))
s_arr = np.zeros((set_solutions.shape[1], set_solutions.shape[2])) # sample standard deviation of data
for i in range(set_solutions.shape[1]):
for j in range(set_solutions.shape[2]):
mean_arr[i, j] = np.mean(set_solutions[:, i, j])
var_arr[i, j] = np.var(set_solutions[:, i, j])
s_arr[i, j] = np.std(set_solutions[:, i, j], ddof=1) #statistics.stdev()
mean_tens = torch.from_numpy(mean_arr)
var_tens = torch.from_numpy(var_arr)
s_arr = torch.from_numpy(s_arr)
# Confidence region for the mean
upper_bound = mean_tens + 1.96 * s_arr / math.sqrt(len(set_solutions))
lower_bound = mean_tens - 1.96 * s_arr / math.sqrt(len(set_solutions))
confidence_region = torch.cat((upper_bound, torch.flip(lower_bound, dims=(0,))), 0)
confidence_grid = torch.cat((grid.reshape(-1), torch.flip(grid.reshape(-1), dims=(0,))), 0)
# case: if dimensionality = 0 - [t, ] and (variable_names = [u, v] or [u, v, ..]
fig = go.Figure()
for n, param in enumerate(variable_names):
color = list(np.random.choice(range(256), size=3))
# fig.add_trace(go.Scatter(x=t_new, y=source_data[n].reshape(-1), name=f'Source data - {param}', line=dict(color='firebrick', width=4)))
fig.add_trace(go.Scatter(x=t_numerical, y=numerical_data[n].reshape(-1), name=f'Initial data - {param}', line=dict(color='firebrick', width=4)))
fig.add_trace(go.Scatter(x=grid.reshape(-1), y=mean_tens[:, n],
name=f'Solution field (mean) - {param}',
line_color=f'rgb({color[0]},{color[1]},{color[2]})',
line=dict(dash='dash')))
fig.add_trace(go.Scatter(x = confidence_grid, y = confidence_region[:, n],
fill='toself',
fillcolor=f'rgba({color[0]},{color[1]},{color[2]},0.2)',
line_color='rgba(255,255,255,0)',
name=f'Confidence region - {param}'))
fig.update_layout(title="hunter_prey",
xaxis=dict(range=[0,11]),
xaxis_title="Time",
yaxis_title="Population")
fig.show()